Load libraries
library(tidyverse)
library(car)
library(MASS)
library(stargazer)
library(effects)
library(ggplot2)
library(MuMIn)
library(spdep)
library(corrplot)
library(gridExtra)
Read in the raw Alachua County tick dataset
ticks <- read.csv("AlachuaTicksAllSites.csv")
head(ticks)
## Sample_Code Collection_Sample Collection_Box gDNA_Sample gDNA_Box
## 1 HW_3May21_AA_A AA1007 AAB13 NymAA103 NymAADNAB1Nat
## 2 HW_13Apr21_IS IS83 IXB2 IS83 ISgDNAB1
## 3 29RD_12Mar21_AA_A AA144 AAB2 NymAA104 NymAADNAB1Nat
## 4 29RD_12Mar21_AA_B AA145 AAB2 NymAA105 NymAADNAB1Nat
## 5 29RD_20Apr21_AA AA639 AAB8 NymAA106 NymAADNAB1Nat
## 6 29RD_17May21_AA_A AA1238 AAB16 NymAA107 NymAADNAB1Nat
## Field_Type Site Site_ID Visit Date Tick_No_Tube Species
## 1 Natural Harmonic Woods HW 9 5/3/21 1 AA
## 2 Natural Harmonic Woods HW 7 4/13/21 1 IS
## 3 Natural 29th Rd 29RD 5 3/12/21 1 AA
## 4 Natural 29th Rd 29RD 5 3/12/21 1 AA
## 5 Natural 29th Rd 29RD 7 4/20/21 1 AA
## 6 Natural 29th Rd 29RD 9 5/17/21 1 AA
## Lifestage Sex Collection_Method
## 1 N <NA> Flagging
## 2 A M Dragging
## 3 N <NA> Dragging
## 4 N <NA> Dragging
## 5 N <NA> Dragging
## 6 N <NA> Dragging
How many individuals of each species and life stage were collected?
ticks %>%
group_by(Species, Lifestage) %>%
dplyr::summarise(total = n())
## # A tibble: 7 × 3
## # Groups: Species [6]
## Species Lifestage total
## <chr> <chr> <int>
## 1 AA A 672
## 2 AA N 1042
## 3 AM N 2
## 4 DV A 18
## 5 HAE N 1
## 6 IA A 1
## 7 IS A 89
How many individuals of each species were collected in natural vs manicured sites?
ticks %>%
group_by(Species, Lifestage, Field_Type) %>%
dplyr::summarise(total = n())
## # A tibble: 12 × 4
## # Groups: Species, Lifestage [7]
## Species Lifestage Field_Type total
## <chr> <chr> <chr> <int>
## 1 AA A Manicured 44
## 2 AA A Natural 628
## 3 AA N Manicured 102
## 4 AA N Natural 940
## 5 AM N Manicured 1
## 6 AM N Natural 1
## 7 DV A Manicured 2
## 8 DV A Natural 16
## 9 HAE N Natural 1
## 10 IA A Natural 1
## 11 IS A Manicured 10
## 12 IS A Natural 79
How many individuals were collected at each visit? First plot combined, and then by species, and then by life stage of each species
visit <- ticks %>%
group_by(Visit) %>%
dplyr::summarise(total = n())
ggplot(data = visit, aes(x=Visit, y=total)) +
theme_classic() +
geom_point() +
labs(title = "tick abundance - all species combined", x = "Visit", y = "# of ticks")
visit_species <- ticks %>%
group_by(Species, Visit) %>%
dplyr::summarise(total = n())
ggplot(data = visit_species, aes(x=Visit, y=total, fill = Species)) +
theme_classic() +
geom_point(aes(color = Species)) +
labs(title = "tick abundance - by species", x = "Visit", y = "# of ticks (adults and nymphs)")
visit_species_lifestage <- ticks %>%
unite("Spp_Lifestage", Species, Lifestage) %>%
group_by(Spp_Lifestage, Visit) %>%
dplyr::summarise(total = n())
ggplot(data = visit_species_lifestage, aes(x=Visit, y=total, fill = Spp_Lifestage)) +
theme_classic() +
geom_point(aes(color = Spp_Lifestage)) +
labs(title = "tick abundance - by species and life stage", x = "Visit", y = "# of ticks")
Make a nice figure for the publication
require(ochRe)
## Loading required package: ochRe
##
## Attaching package: 'ochRe'
## The following object is masked from 'package:spData':
##
## elev
pal_names <- names(ochre_palettes)
ggplot(data = visit_species, aes(x=Visit, y=total, fill = Species)) +
theme_classic() +
geom_bar(stat = "identity") +
labs(title = "", x = "", y = "number of ticks") +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank()) +
scale_fill_manual(labels = c("A. americanum", "A. maculatum", "D. variabilis",
"H. leporispalustris", "I. affinis", "I. scapularis"),
values = c("#486090", "#786060", "#d8c0a8", "#d8f0f0", "#a890a8", "#f0a860")) # can change colors here
f1 <- ggplot(data = visit_species, aes(x=Visit, y=log1p(total), fill = Species)) +
theme_classic() +
geom_bar(stat = "identity") +
labs(title = "", x = "", y = "log(number of ticks)") +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank()) +
scale_fill_manual(labels = c("A. americanum", "A. maculatum", "D. variabilis",
"H. leporispalustris", "I. affinis", "I. scapularis"),
values = c("#486090", "#786060", "#d8c0a8", "#d8f0f0", "#a890a8", "#f0a860")) # can change colors here
f1
Make a nice figure that is colorblindness friendly
library(viridisLite)
mycol <- viridisLite::viridis(6)
k1 <- ggplot(data = visit_species, aes(x=Visit, y=log1p(total), fill = Species)) +
theme_classic() +
geom_bar(stat = "identity") +
labs(title = "", x = "", y = "log(number of ticks)") +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank()) +
scale_fill_manual(labels = c("A. americanum", "A. maculatum", "D. variabilis",
"H. leporispalustris", "I. affinis", "I. scapularis"),
values = mycol) # can change colors here
k1
#ggsave("Figures/TickAbundanceBySession.tiff", dpi = 300, width = 10)
How does each site differ in tick abundance? remember: GA had 0 ticks the entire study
site1 <- ticks %>%
group_by(Site_ID) %>%
dplyr::summarise(total = n())
site1[18,] <- list("GA", 0)
site <- ticks %>%
group_by(Species, Site_ID, Field_Type) %>%
dplyr::summarise(total = n())
site[38,] <- list("AA","GA","Manicured",0)
# sort manicured on left and natural on right
site$Site_ID <- factor(site$Site_ID, levels = c("GA","RP","UG","PC","FC","LL","DF","HT","MSM",
"HW","JM","29RD","SW","HC","SF","PP","SR","MSN"))
p1 <- ggplot(data = site, aes(x=Site_ID, y=total, fill =Species)) +
theme_classic() +
geom_bar(stat = "identity") +
labs(x = "Site", y = "number of ticks") +
scale_fill_manual(labels = c("A. americanum", "A. maculatum", "D. variabilis",
"H. leporispalustris", "I. affinis", "I. scapularis"),
values = c("#486090", "#786060", "#d8c0a8", "#d8f0f0", "#a890a8", "#f0a860"))
p2 <- ggplot(data = site, aes(x=Site_ID, y=log1p(total), fill =Species)) +
theme_classic() +
geom_bar(stat = "identity") +
labs(x = "Site", y = "log(number of ticks)") +
scale_fill_manual(labels = c("A. americanum", "A. maculatum", "D. variabilis",
"H. leporispalustris", "I. affinis", "I. scapularis"),
values = c("#486090", "#786060", "#d8c0a8", "#d8f0f0", "#a890a8", "#f0a860"))
#gridExtra::grid.arrange(p1,p2, ncol=2)
p2
#ggsave("alco_abund_bysite.tiff", dpi = 300)
Let’s work some more with just Amblyomma americanum since it is by far the most abundant species of tick collected during the study
aa <- ticks %>%
filter(Species == "AA")
head(aa)
## Sample_Code Collection_Sample Collection_Box gDNA_Sample gDNA_Box
## 1 HW_3May21_AA_A AA1007 AAB13 NymAA103 NymAADNAB1Nat
## 2 29RD_12Mar21_AA_A AA144 AAB2 NymAA104 NymAADNAB1Nat
## 3 29RD_12Mar21_AA_B AA145 AAB2 NymAA105 NymAADNAB1Nat
## 4 29RD_20Apr21_AA AA639 AAB8 NymAA106 NymAADNAB1Nat
## 5 29RD_17May21_AA_A AA1238 AAB16 NymAA107 NymAADNAB1Nat
## 6 29RD_17May21_AA_B AA1239 AAB16 NymAA108 NymAADNAB1Nat
## Field_Type Site Site_ID Visit Date Tick_No_Tube Species
## 1 Natural Harmonic Woods HW 9 5/3/21 1 AA
## 2 Natural 29th Rd 29RD 5 3/12/21 1 AA
## 3 Natural 29th Rd 29RD 5 3/12/21 1 AA
## 4 Natural 29th Rd 29RD 7 4/20/21 1 AA
## 5 Natural 29th Rd 29RD 9 5/17/21 1 AA
## 6 Natural 29th Rd 29RD 9 5/17/21 1 AA
## Lifestage Sex Collection_Method
## 1 N <NA> Flagging
## 2 N <NA> Dragging
## 3 N <NA> Dragging
## 4 N <NA> Dragging
## 5 N <NA> Dragging
## 6 N <NA> Dragging
Let’s plot the count of A.americanum (nymphs and adults combined) by visit and site
aa_visit <- aa %>%
group_by(Site_ID, Visit) %>%
dplyr::summarise(total = n())
ggplot(data = aa_visit, aes(x=Visit, y=total, col = Site_ID)) +
theme_classic() +
geom_point() +
labs(title = "A.americanum", x = "Visit", y = "# of ticks")
Let’s take a closer look at the seasonality of A.americanum by combining counts from all sites and keeping visits separate
aa_allsites <- aa %>%
group_by(Lifestage, Visit) %>%
dplyr::summarise(total = n())
g1 <- ggplot(data = aa_allsites, aes(x=Visit, y=total, fill = Lifestage)) +
theme_classic() +
geom_point(aes(color = Lifestage)) +
labs(title = "A.americanum", x = "Visit", y = "# of ticks")
g2 <- ggplot(data = aa_allsites, aes(x=Visit, y=total, fill = Lifestage)) +
theme_classic() +
geom_smooth(aes(color = Lifestage)) +
labs(title = "A.americanum", x = "Visit", y = "# of ticks")
gridExtra::grid.arrange(g1, g2, ncol =2)
interesting: Both adults and nymphs of Amblyomma americanum have similar seasonality of peak abundance in Alachua Co. (at least between January and June)
Now let’s combine counts across all visits and keep the sites seperate
aa_allvisits <- aa %>%
group_by(Site_ID, Lifestage) %>%
dplyr::summarize(total = n(),
density = (total/12000) * 100)
## add in rows for the sites and life stages with no observations
aa_allvisits[31,] <- list("29RD", "A", 0 , 0)
aa_allvisits[32,] <- list("HW", "A", 0 , 0)
aa_allvisits[33,] <- list("UG", "A", 0, 0)
aa_allvisits[34,] <- list("UG", "N", 0, 0)
aa_allvisits[35,] <- list("GA", "A", 0, 0)
aa_allvisits[36,] <- list("GA", "N", 0, 0)
aa_allvisits$Site_ID <- factor(aa_allvisits$Site_ID, levels = c("GA","UG","HW","RP","JM","PC","29RD","FC","LL","DF","MSM","HT","HC","SW","SF","PP","SR","MSN"
))
ggplot(data = aa_allvisits, aes(x=Site_ID, y=total, fill = Lifestage)) +
theme_classic() +
geom_bar(stat = "identity") +
labs(title = "A.americanum", x = "Site", y = "# of ticks")
Let’s investigate Ixodes scapularis, as it was the second most abundant tick species in the study and is of great public health interest
is <- ticks %>%
filter(Species == "IS")
head(is)
## Sample_Code Collection_Sample Collection_Box gDNA_Sample gDNA_Box
## 1 HW_13Apr21_IS IS83 IXB2 IS83 ISgDNAB1
## 2 JM_04Feb21_IS IS32 IXB1 IS32 ISgDNAB1
## 3 SW_03Feb21_IS_A IS30 IXB1 IS30 ISgDNAB1
## 4 SW_03Feb21_IS_B IS31 IXB1 IS31 ISgDNAB1
## 5 SR_21Dec20_IS IS01 IXB1 IS01 ISgDNAB1
## 6 SR_18Jan21_IS IS04 IXB1 IS04 ISgDNAB1
## Field_Type Site Site_ID Visit Date Tick_No_Tube Species
## 1 Natural Harmonic Woods HW 7 4/13/21 1 IS
## 2 Natural John Mahon JM 3 2/4/21 1 IS
## 3 Natural Sweetwater SW 3 2/3/21 1 IS
## 4 Natural Sweetwater SW 3 2/3/21 1 IS
## 5 Natural Split Rock SR 1 12/21/20 1 IS
## 6 Natural Split Rock SR 1 1/18/21 1 IS
## Lifestage Sex Collection_Method
## 1 A M Dragging
## 2 A F Dragging
## 3 A F Flagging
## 4 A M On_Person
## 5 A F Flagging
## 6 A F Flagging
Now let’s plot the count of I.scapularis (rembember there were only adults collected) by visit and site
is_visit <- is %>%
group_by(Site_ID, Visit) %>%
dplyr::summarise(total = n())
ggplot(data = is_visit, aes(x=Visit, y=total, col = Site_ID)) +
theme_classic() +
geom_point() +
labs(title = "I.scapularis", x = "Visit", y = "# of ticks")
let’s take a closer look at the seasonality of I.scapularis by combining counts from all sites and keeping visits separate
is_allsites <- is %>%
group_by(Lifestage, Visit) %>%
dplyr::summarise(total = n())
g3 <- ggplot(data = is_allsites, aes(x=Visit, y=total, fill = Lifestage)) +
theme_classic() +
geom_point(aes(color = Lifestage)) +
labs(title = "I.scapularis", x = "Visit", y = "# of ticks")
g4 <- ggplot(data = is_allsites, aes(x=Visit, y=total, fill = Lifestage)) +
theme_classic() +
geom_smooth(aes(color = Lifestage)) +
labs(title = "I.scapularis", x = "Visit", y = "# of ticks")
gridExtra::grid.arrange(g3,g4, ncol=2)
now let’s combine counts across all visits and keep the sites seperate
is_allvisits <- is %>%
group_by(Site_ID, Lifestage) %>%
dplyr::summarize(total = n(),
density = (total/12000) * 100)
## add in rows for the sites and life stages with no observations
is_allvisits[12,] <- list("29RD", "A", 0 , 0)
is_allvisits[13,] <- list("GA", "A", 0, 0)
is_allvisits[14,] <- list("RP", "A", 0, 0)
is_allvisits[15,] <- list("PC", "A", 0, 0)
is_allvisits[16,] <- list("FC", "A", 0, 0)
is_allvisits[17,] <- list("LL", "A", 0, 0)
is_allvisits[18,] <- list("HT", "A", 0, 0)
is_allvisits$Site_ID <- factor(is_allvisits$Site_ID, levels = c("GA","UG","HW","RP","JM","PC","29RD","FC","LL","DF","MSM","HT","HC","SW","SF","PP","SR","MSN"
))
ggplot(data = is_allvisits, aes(x=Site_ID, y=total, fill = Lifestage)) +
theme_classic() +
geom_bar(stat = "identity") +
labs(title = "I.scapularis", x = "Site", y = "# of ticks")
The order of sites is the I.scapularis plot is the same as the A. americanum plot earlier. Just using the eyeball test this shows many of the sites with high A. americanum abundance also have I.scapularis, but there are some deviations from this trend.
Read in the covariate data for all 18 sites This file contains information on if the environment type was manicured or natural, the proportion of forest landcover and the PC1 and PC2 scores at multiple spatial scales (250m, 500m, 1km, 2km, 3km, 4km buffers)
covar <- read.csv("AlCo_SitesCovariates.csv") ## 18 sites
head(covar)
## Site Site_ID Easting Northing Field_Type Forest250m Forest500m
## 1 Reserve Park RP 373075.5 3281598 Manicured 0.0000000 0.0259972
## 2 University Gardens UG 368561.7 3280166 Manicured 0.4059153 0.2280836
## 3 Hogtown Creek HT 370033.7 3285928 Manicured 0.6793893 0.4689848
## 4 Possum Creek Park PC 365990.0 3286586 Manicured 0.4842319 0.3245446
## 5 Fred Cone Park FC 375389.6 3280603 Manicured 0.1525510 0.4584023
## 6 Green Acres Park GA 366138.7 3280294 Manicured 0.6352041 0.5656334
## Forest1km Forest2km Forest3km Forest4km PC1_250m PC2_250m PC1_500m
## 1 0.009046603 0.04787764 0.1523864 0.2497138 3.0888399 1.25403665 -2.2285643
## 2 0.132940615 0.15290895 0.1432444 0.1541731 1.1042450 -2.71890707 -1.6918854
## 3 0.174897316 0.24949040 0.2753919 0.3340898 -0.4720229 0.26182597 -0.1811466
## 4 0.317132210 0.17751376 0.2416931 0.2822197 0.5823618 0.03426844 -0.8718743
## 5 0.415052873 0.40032172 0.4285704 0.4357618 1.0914845 -0.03089397 0.7046526
## 6 0.459594672 0.26329957 0.2377961 0.2197173 -0.2358455 0.56020279 0.3665116
## PC2_500m PC1_1km PC2_1km PC1_2km PC2_2km PC1_3km
## 1 1.14772157 -2.2237817 0.83389005 -2.2345973 0.29634256 -1.6579262
## 2 -4.17393010 -1.9912260 -1.90230643 -1.8760657 -0.08449422 -1.8529678
## 3 0.15115773 -1.4013427 0.04034195 -0.8045536 -0.19268275 -0.5241398
## 4 0.05922524 -0.5187457 0.44283357 -0.8368180 -0.25976343 -0.4419081
## 5 0.75143021 0.2710480 -0.96944716 0.2085390 0.28946092 0.2753296
## 6 0.53614632 0.5772039 0.23898928 -1.2213185 0.06781415 -1.5203214
## PC2_3km PC1_4km PC2_4km
## 1 0.21477736 -1.7101280 -0.04952885
## 2 -0.29752137 -1.8570855 -0.11805117
## 3 0.06247605 -0.1973380 0.09175826
## 4 0.11303452 -0.2691747 0.27311929
## 5 0.36739258 -0.2885019 0.31535917
## 6 -0.17733426 -1.3869067 -0.04092310
explore the relationship between different buffers by plotting them using the package ‘corrplot’
covar_sub <- covar[,c(6:23)]
M <- cor(covar_sub)
#head(round(M,2))
corrplot(M, type = "upper", order = "hclust")
There is high correlation between proportion of forest and PC1 across all scales.
We will need to compare models using individual covariates to determine the appropriate scale and response variable.
join the covariate data to the A.americanum data and summarize total and density by site
aa_var <- aa_allvisits %>%
left_join(covar)
head(aa_var)
## # A tibble: 6 × 26
## # Groups: Site_ID [4]
## Site_ID Lifestage total density Site Easting Northing Field_Type Forest250m
## <chr> <chr> <int> <dbl> <chr> <dbl> <dbl> <chr> <dbl>
## 1 29RD N 6 0.05 "29th … 370183. 3284095. Natural 0.338
## 2 DF A 13 0.108 "Dudle… 350804. 3281711. Manicured 0.442
## 3 DF N 14 0.117 "Dudle… 350804. 3281711. Manicured 0.442
## 4 FC A 4 0.0333 "Fred … 375390. 3280603. Manicured 0.153
## 5 FC N 10 0.0833 "Fred … 375390. 3280603. Manicured 0.153
## 6 HC A 25 0.208 "Hatch… 381436. 3285055. Natural 0.833
## # … with 17 more variables: Forest500m <dbl>, Forest1km <dbl>, Forest2km <dbl>,
## # Forest3km <dbl>, Forest4km <dbl>, PC1_250m <dbl>, PC2_250m <dbl>,
## # PC1_500m <dbl>, PC2_500m <dbl>, PC1_1km <dbl>, PC2_1km <dbl>,
## # PC1_2km <dbl>, PC2_2km <dbl>, PC1_3km <dbl>, PC2_3km <dbl>, PC1_4km <dbl>,
## # PC2_4km <dbl>
aa_combined <- aa_allvisits %>%
group_by(Site_ID) %>%
dplyr::summarise(total = sum(total),
density = sum(density)) %>%
left_join(covar)
head(aa_combined)
## # A tibble: 6 × 25
## Site_ID total density Site Easting Northing Field_Type Forest250m Forest500m
## <chr> <int> <dbl> <chr> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 GA 0 0 "Gree… 366139. 3280294. Manicured 0.635 0.566
## 2 UG 0 0 "Univ… 368562. 3280166. Manicured 0.406 0.228
## 3 HW 1 0.00833 "Harm… 368536. 3280276. Natural 0.290 0.192
## 4 RP 2 0.0167 "Rese… 373075. 3281598. Manicured 0 0.0260
## 5 JM 3 0.025 "John… 365650. 3281144. Natural 0.286 0.134
## 6 PC 5 0.0417 "Poss… 365990. 3286586. Manicured 0.484 0.325
## # … with 16 more variables: Forest1km <dbl>, Forest2km <dbl>, Forest3km <dbl>,
## # Forest4km <dbl>, PC1_250m <dbl>, PC2_250m <dbl>, PC1_500m <dbl>,
## # PC2_500m <dbl>, PC1_1km <dbl>, PC2_1km <dbl>, PC1_2km <dbl>, PC2_2km <dbl>,
## # PC1_3km <dbl>, PC2_3km <dbl>, PC1_4km <dbl>, PC2_4km <dbl>
aa_combined$Field_Type <- as.factor(aa_combined$Field_Type)
here I will work with combined adults and nymphs (adults and nymphs have similar patterns over space and time so it makes sense to combine their analyses)
pres.250m<-glm(total ~ PC1_250m, family = "poisson", data = aa_combined)
pres.500m<-glm(total ~ PC1_500m, family = "poisson", data = aa_combined)
pres.1km<-glm(total ~ PC1_1km, family = "poisson", data = aa_combined)
pres.2km<-glm(total ~ PC1_2km, family = "poisson", data = aa_combined)
pres.3km<-glm(total ~ PC1_3km, family = "poisson", data = aa_combined)
pres.4km<-glm(total ~ PC1_4km, family = "poisson", data = aa_combined)
comb.mod<-model.sel(pres.250m, pres.500m, pres.1km, pres.2km, pres.3km, pres.4km)
comb.mod
## Model selection table
## (Int) PC1_250 PC1_500 PC1_1km PC1_2km PC1_3km PC1_4km family df
## pres.500m 3.525 0.8293 poisson(log) 2
## pres.250m 3.884 -0.8437 poisson(log) 2
## pres.1km 4.062 0.4287 poisson(log) 2
## pres.2km 4.244 0.3055 poisson(log) 2
## pres.3km 4.416 0.1748 poisson(log) 2
## pres.4km 4.474 0.1186 poisson(log) 2
## logLik AICc delta weight
## pres.500m -1016.522 2037.8 0.00 1
## pres.250m -1098.726 2202.3 164.41 0
## pres.1km -1373.671 2752.1 714.30 0
## pres.2km -1489.073 2982.9 945.10 0
## pres.3km -1683.140 3371.1 1333.24 0
## pres.4km -1734.343 3473.5 1435.64 0
## Models ranked by AICc(x)
scales<-c(250,500,1000,2000,3000,4000)
ll<-c(logLik(pres.250m),logLik(pres.500m),logLik(pres.1km),logLik(pres.2km),logLik(pres.3km),logLik(pres.4km))
plot(scales,ll, ylab="Log-likelihood", main = "PC1")
lines(scales,ll)
these results indicate that 500m for PC1 is the best scale for Amblyomma americanum
nb.glm0 <- glm.nb(total ~ 1, data = aa_combined)
nb.glm2 <- glm.nb(total ~ Field_Type, data = aa_combined)
nb.glm11 <- glm.nb(total ~ PC1_500m, data = aa_combined)
nb.glm12 <- glm.nb(total ~ PC1_500m + Field_Type, data = aa_combined)
nb.glm13 <- glm.nb(total ~ PC1_500m * Field_Type, data = aa_combined)
### compare AICc of models
MuMIn::AICc(nb.glm0, nb.glm2, nb.glm11, nb.glm12, nb.glm13)
## df AICc
## nb.glm0 2 187.2285
## nb.glm2 3 181.6323
## nb.glm11 3 174.1242
## nb.glm12 4 173.4902
## nb.glm13 5 176.1766
stargazer(nb.glm0, nb.glm2, nb.glm11, nb.glm12, nb.glm13, type = "text",
intercept.bottom = FALSE,
single.row=FALSE,
notes.append = FALSE,
header=FALSE)
##
## ===============================================================================================================
## Dependent variable:
## ------------------------------------------------------------------------------------
## total
## (1) (2) (3) (4) (5)
## ---------------------------------------------------------------------------------------------------------------
## Constant 4.556*** 2.786*** 3.144*** 2.461*** 2.474***
## (0.405) (0.478) (0.305) (0.377) (0.361)
##
## Field_TypeNatural 2.374*** 1.238** 1.058**
## (0.672) (0.525) (0.532)
##
## PC1_500m:Field_TypeNatural 0.472
## (0.330)
##
## PC1_500m 1.129*** 0.945*** 0.650**
## (0.182) (0.165) (0.259)
##
## ---------------------------------------------------------------------------------------------------------------
## Observations 18 18 18 18 18
## Log Likelihood -92.214 -87.959 -84.205 -82.207 -81.588
## theta 0.341*** (0.100) 0.501*** (0.156) 0.715*** (0.233) 0.899*** (0.310) 0.967*** (0.338)
## Akaike Inf. Crit. 186.429 179.918 172.410 170.413 171.177
## ===============================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
lowest AICc is for PC1 at 500m scale and Field_Type
lowest AICc does not include the interaction term - but many models are within 2 delta AIC
par(mfrow=c(2,2), mar=c(4,4,2,1))
plot(nb.glm12)
sites 12 and 17 are problimatic…
Before we interpret the models, let’s check whether the assumption of independent residuals is violated by spatial autocorrelation in the residuals. To calculate and test Moran’s I, we first need to define neighbours and spatial weights. There are many ways to define weights.
start by defining weights matrix using several graph types
coords <- cbind(aa_combined$Easting, aa_combined$Northing)
colnames(coords) <- c("x", "y")
distmat <- as.matrix(dist(coords))
maxdist <- 2/3 * max(distmat) # this is the maximum distance to consider in correlogram/variogram
nb.gab <- spdep::graph2nb(spdep::gabrielneigh(coords), sym = TRUE)
#plot(nb.gab, coords)
listw.gab <- spdep::nb2listw(nb.gab)
nb.rel <- spdep::graph2nb(spdep::relativeneigh(coords), sym = TRUE)
#plot(nb.rel, coords)
listw.rel <- spdep::nb2listw(nb.rel)
#distance-based neighbors - can change the distance at which to consider
neigh <- dnearneigh(x=coords, d1=0, d2=0.5*maxdist, longlat = F)
#plot(neigh, coords)
listw.neigh <- nb2listw(neigh, style = "W")
inverse distance weights
dlist <- spdep::nbdists(nb.gab, coords)
dlist <- lapply(dlist, function(x) 1/x)
listw.d1 <- spdep::nb2listw(nb.gab, style = "W", glist=dlist)
spdep::lm.morantest(nb.glm12, listw.d1)
##
## Global Moran I for regression residuals
##
## data:
## model: glm.nb(formula = total ~ PC1_500m + Field_Type, data =
## aa_combined, init.theta = 0.899499891, link = log)
## weights: listw.d1
##
## Moran I statistic standard deviate = 1.2648, p-value = 0.103
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.16765296 -0.09688980 0.04374652
test for spatial autocorrelation in explainatory and predictor variables
spdep::moran.test(aa_combined$total, listw.gab)
##
## Moran I test under randomisation
##
## data: aa_combined$total
## weights: listw.gab
##
## Moran I statistic standard deviate = -0.18395, p-value = 0.573
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## -0.08892680 -0.05882353 0.02678138
spdep::moran.test(aa_combined$PC1_500m, listw.gab)
##
## Moran I test under randomisation
##
## data: aa_combined$PC1_500m
## weights: listw.gab
##
## Moran I statistic standard deviate = 3.2639, p-value = 0.0005494
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.55781767 -0.05882353 0.03569368
now test model for autocorrelation in the model residuals
spdep::lm.morantest(nb.glm12, listw.gab)
##
## Global Moran I for regression residuals
##
## data:
## model: glm.nb(formula = total ~ PC1_500m + Field_Type, data =
## aa_combined, init.theta = 0.899499891, link = log)
## weights: listw.gab
##
## Moran I statistic standard deviate = 1.3116, p-value = 0.09483
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.12865654 -0.10474641 0.03166904
spdep::lm.morantest(nb.glm12, listw.rel)
##
## Global Moran I for regression residuals
##
## data:
## model: glm.nb(formula = total ~ PC1_500m + Field_Type, data =
## aa_combined, init.theta = 0.899499891, link = log)
## weights: listw.rel
##
## Moran I statistic standard deviate = 1.4577, p-value = 0.07247
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.20089111 -0.11043964 0.04561667
spdep::lm.morantest(nb.glm12, listw.neigh)
##
## Global Moran I for regression residuals
##
## data:
## model: glm.nb(formula = total ~ PC1_500m + Field_Type, data =
## aa_combined, init.theta = 0.899499891, link = log)
## weights: listw.neigh
##
## Moran I statistic standard deviate = 0.77701, p-value = 0.2186
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.004266432 -0.081010163 0.012045134
no significant spatial autocorrelation in residuals for any of the weight matrices
stargazer(nb.glm12, type = "text",
intercept.bottom = FALSE,
single.row=FALSE,
notes.append = FALSE,
header=FALSE)
##
## =============================================
## Dependent variable:
## ---------------------------
## total
## ---------------------------------------------
## Constant 2.461***
## (0.377)
##
## PC1_500m 0.945***
## (0.165)
##
## Field_TypeNatural 1.238**
## (0.525)
##
## ---------------------------------------------
## Observations 18
## Log Likelihood -82.207
## theta 0.899*** (0.310)
## Akaike Inf. Crit. 170.413
## =============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
summary(nb.glm12)
##
## Call:
## glm.nb(formula = total ~ PC1_500m + Field_Type, data = aa_combined,
## init.theta = 0.899499891, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.30982 -1.12039 -0.37648 0.03739 1.79071
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.4607 0.3766 6.534 6.42e-11 ***
## PC1_500m 0.9446 0.1655 5.709 1.14e-08 ***
## Field_TypeNatural 1.2378 0.5249 2.358 0.0184 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.8995) family taken to be 1)
##
## Null deviance: 54.865 on 17 degrees of freedom
## Residual deviance: 20.811 on 15 degrees of freedom
## AIC: 170.41
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.899
## Std. Err.: 0.310
##
## 2 x log-likelihood: -162.413
pseudo r-squared of model
100*(54.865-20.811)/54.865
## [1] 62.06871
The pseudo r-squared of the top model = 62.1.
df1 <- expand.grid(PC1_500m = c(-3,-2.5,-2,-1.5,-1,-0.5,0,0.5,1,1.5,2,2.5,3), Field_Type = c("Natural","Manicured"), total = NA)
df1$total <- predict(nb.glm12, newdata = data.frame(df1), type="response")
#plot
plot <-ggplot(data = aa_combined, aes(x=PC1_500m, y=total, group=Field_Type))+
geom_point(aes(color=Field_Type, size = 2)) +
geom_line(data=df1, size = 2, aes(color=Field_Type)) +
ylab("A.americanum abundance") +
xlab("PC1 at 500m")+
#ggtitle("PC1 and Environment Type as Predictors of tick count")+
theme_bw()+
theme(panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
legend.position = c(0.3,0.8)) +
scale_color_discrete(name = "Environment Type", labels = c("manicured", "natural")) +
scale_size(guide = 'none')
plot.log <-ggplot(data = aa_combined, aes(x=PC1_500m, y=total, group=Field_Type))+
geom_point(aes(color=Field_Type, size =2)) +
geom_line(data=df1, size = 2, aes(color=Field_Type)) +
ylab("A.americanum abundance") +
xlab("PC1 at 500m")+
scale_y_log10() + ## may or may not want to use a log axis....
#ggtitle("PC1 and Environment Type as Predictors of tick count")+
theme_bw()+
theme(panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
legend.position = c(0.3,0.8)) +
scale_color_discrete(name = "Environment Type", labels = c("manicured", "natural")) +
scale_size(guide = 'none')
grid.arrange(plot, plot.log, ncol = 2)
## Warning: Transformation introduced infinite values in continuous y-axis
let’s now remove the two sites (12 and 17) that fall outside model assumptions
aa_screen <- aa_combined[-c(12,17), ]
plot1 <- ggplot(data = aa_combined, aes(x = PC1_500m, y = total)) +
geom_point(aes(col = Field_Type)) +
xlim(-3,3) +
ggtitle("Before")
plot2 <- ggplot(data = aa_screen, aes(x = PC1_500m, y = total)) +
geom_point(aes(col = Field_Type)) +
xlim(-3,3) +
ggtitle("After")
gridExtra::grid.arrange(plot1, plot2, ncol=2)
compared all screened models
nb.glm0.screen <- glm.nb(total ~ 1, data = aa_screen)
nb.glm2.screen <- glm.nb(total ~ Field_Type, data = aa_screen)
nb.glm11.screen <- glm.nb(total ~ PC1_500m, data = aa_screen)
nb.glm12.screen <- glm.nb(total ~ PC1_500m + Field_Type, data = aa_screen)
nb.glm13.screen <- glm.nb(total ~ PC1_500m * Field_Type, data = aa_screen)
AICc(nb.glm0.screen,nb.glm2.screen,nb.glm11.screen,nb.glm12.screen,nb.glm13.screen)
## df AICc
## nb.glm0.screen 2 159.3597
## nb.glm2.screen 3 155.2060
## nb.glm11.screen 3 140.6958
## nb.glm12.screen 4 138.2847
## nb.glm13.screen 5 141.4477
summary(nb.glm12.screen)
##
## Call:
## glm.nb(formula = total ~ PC1_500m + Field_Type, data = aa_screen,
## init.theta = 1.881768818, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.59291 -1.08740 0.00373 0.52410 1.45657
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.8675 0.3151 5.927 3.09e-09 ***
## PC1_500m 1.0032 0.1308 7.667 1.75e-14 ***
## Field_TypeNatural 1.2399 0.4132 3.001 0.00269 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(1.8818) family taken to be 1)
##
## Null deviance: 96.922 on 15 degrees of freedom
## Residual deviance: 18.974 on 13 degrees of freedom
## AIC: 134.65
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 1.882
## Std. Err.: 0.828
##
## 2 x log-likelihood: -126.648
stargazer(nb.glm12, nb.glm12.screen, type = "text",
intercept.bottom = FALSE,
single.row=FALSE,
notes.append = FALSE,
header=FALSE)
##
## ==================================================
## Dependent variable:
## --------------------------------
## total
## (1) (2)
## --------------------------------------------------
## Constant 2.461*** 1.868***
## (0.377) (0.315)
##
## PC1_500m 0.945*** 1.003***
## (0.165) (0.131)
##
## Field_TypeNatural 1.238** 1.240***
## (0.525) (0.413)
##
## --------------------------------------------------
## Observations 18 16
## Log Likelihood -82.207 -64.324
## theta 0.899*** (0.310) 1.882** (0.828)
## Akaike Inf. Crit. 170.413 134.648
## ==================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
pseudo r-squared of screened model
100*(96.922-18.974)/96.922
## [1] 80.42343
the pseudo r-squared of the screened model = 80.4.
The AIC and LL are also much better for the screened data set. And, the models estimates and significance are minimally affected by the sites that fall outside assumptions.
par(mfrow=c(2,2), mar=c(4,4,2,1))
plot(nb.glm12.screen)
the screened model looks good!
df1 <- expand.grid(PC1_500m = c(-3,-2.5,-2,-1.5,-1,-0.5,0,0.5,1,1.5,2,2.5,3), Field_Type = c("Natural","Manicured"), total = NA)
df1$total <- predict(nb.glm12.screen, newdata = data.frame(df1), type="response")
#plot
plot <-ggplot(data = aa_combined, aes(x=PC1_500m, y=total, group=Field_Type))+
geom_point(aes(color=Field_Type, size = 2)) +
geom_line(data=df1, size = 2, aes(color=Field_Type)) +
ylab("A.americanum abundance") +
xlab("PC1 at 500m")+
#ggtitle("PC1 and Environment Type as Predictors of tick count")+
theme_bw()+
theme(panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
legend.position = c(0.3,0.8)) +
scale_color_discrete(name = "Environment Type", labels = c("manicured", "natural")) +
scale_size(guide = 'none')
plot.log <-ggplot(data = aa_combined, aes(x=PC1_500m, y=total, group=Field_Type))+
geom_point(aes(color=Field_Type, size =2)) +
geom_line(data=df1, size = 2, aes(color=Field_Type)) +
ylab("A.americanum abundance") +
xlab("PC1 at 500m")+
scale_y_log10() + ## may or may not want to use a log axis....
#ggtitle("PC1 and Environment Type as Predictors of tick count")+
theme_bw()+
theme(panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
legend.position = c(0.3,0.8)) +
scale_color_discrete(name = "Environment Type", labels = c("manicured", "natural")) +
scale_size(guide = 'none')
grid.arrange(plot, plot.log, ncol = 2)
## Warning: Transformation introduced infinite values in continuous y-axis
get 95%CI for model prediction
## grad the inverse link function
ilink <- family(nb.glm12.screen)$linkinv
## add predition, fit, and se.fit on the **link** scale
ndata <- expand.grid(PC1_500m = c(-3,-2.5,-2,-1.5,-1,-0.5,0,0.5,1,1.5,2,2.5,3), Field_Type = c("Manicured", "Natural"))
ndata$total <- predict(nb.glm12.screen, newdata = data.frame(ndata), type = "response")
ndata <- bind_cols(ndata, setNames(as_tibble(predict(nb.glm12.screen, ndata, se.fit = TRUE)[1:2]),
c('fit_link','se_link')))
## create the interval and backtransform
ndata <- mutate(ndata,
fit_resp = ilink(fit_link),
right_upr = ilink(fit_link + (2 * se_link)),
right_lwr = ilink(fit_link - (2 * se_link)))
## show
ndata
## PC1_500m Field_Type total fit_link se_link fit_resp right_upr
## 1 -3.0 Manicured 0.3191457 -1.1421075 0.5648655 0.3191457 0.9876997
## 2 -2.5 Manicured 0.5270281 -0.6405015 0.5109234 0.5270281 1.4642532
## 3 -2.0 Manicured 0.8703190 -0.1388955 0.4599697 0.8703190 2.1837508
## 4 -1.5 Manicured 1.4372198 0.3627105 0.4131116 1.4372198 3.2835784
## 5 -1.0 Manicured 2.3733834 0.8643165 0.3719005 2.3733834 4.9934023
## 6 -0.5 Manicured 3.9193371 1.3659225 0.3384056 3.9193371 7.7116629
## 7 0.0 Manicured 6.4722807 1.8675285 0.3150976 6.4722807 12.1547940
## 8 0.5 Manicured 10.6881383 2.3691346 0.3043259 10.6881383 19.6442812
## 9 1.0 Manicured 17.6500842 2.8707406 0.3074112 17.6500842 32.6407978
## 10 1.5 Manicured 29.1468417 3.3723466 0.3239579 29.1468417 55.7157255
## 11 2.0 Manicured 48.1322564 3.8739526 0.3520732 48.1322564 97.3291833
## 12 2.5 Manicured 79.4842244 4.3755586 0.3892582 79.4842244 173.1355730
## 13 3.0 Manicured 131.2579629 4.8771646 0.4331836 131.2579629 312.1649666
## 14 -3.0 Natural 1.1026948 0.0977570 0.5932716 1.1026948 3.6121396
## 15 -2.5 Natural 1.8209585 0.5993630 0.5362093 1.8209585 5.3216402
## 16 -2.0 Natural 3.0070785 1.1009690 0.4812787 3.0070785 7.8736872
## 17 -1.5 Natural 4.9658030 1.6025750 0.4292991 4.9658030 11.7185514
## 18 -1.0 Natural 8.2003842 2.1041810 0.3814785 8.2003842 17.5866141
## 19 -0.5 Natural 13.5418788 2.6057870 0.3395787 13.5418788 26.7074993
## 20 0.0 Natural 22.3626693 3.1073930 0.3060411 22.3626693 41.2427490
## 21 0.5 Natural 36.9290691 3.6089990 0.2838456 36.9290691 65.1498700
## 22 1.0 Natural 60.9836031 4.1106050 0.2757448 60.9836031 105.8575774
## 23 1.5 Natural 100.7065689 4.6122110 0.2829517 100.7065689 177.3481059
## 24 2.0 Natural 166.3039325 5.1138170 0.3043811 166.3039325 305.6923608
## 25 2.5 Natural 274.6295330 5.6154230 0.3373332 274.6295330 539.2015643
## 26 3.0 Natural 453.5153152 6.1170290 0.3788129 453.5153152 967.4413645
## right_lwr
## 1 0.1031224
## 2 0.1896930
## 3 0.3468597
## 4 0.6290700
## 5 1.1280783
## 6 1.9919444
## 7 3.4464111
## 8 5.8152446
## 9 9.5440521
## 10 15.2477307
## 11 23.8028721
## 12 36.4901436
## 13 55.1908596
## 14 0.3366248
## 15 0.6230955
## 16 1.1484481
## 17 2.1042873
## 18 3.8237208
## 19 6.8663291
## 20 12.1255006
## 21 20.9325997
## 22 35.1321080
## 23 57.1859111
## 24 90.4733043
## 25 139.8760415
## 26 212.5980434
plot it - FIGURE FOR PUBLICATIONS
mycol <- viridisLite::viridis(3)
aa.plt <- ggplot(aa_screen, aes(x = PC1_500m, y = total)) +
geom_point(aes(color=Field_Type), size = 4) +
scale_color_manual(values = mycol, name = "Habitat Type", labels = c("manicured", "natural")) +
geom_line(data = ndata, size =2, aes(color=Field_Type)) +
geom_ribbon(data = ndata,
aes(ymin = right_lwr, ymax = right_upr, color=Field_Type),
alpha = 0.1) +
theme_bw() +
theme(panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
legend.position = c(0.3,0.7)) +
scale_size(guide = 'none') +
ylab("A. americanum abundance") +
xlab("Development Gradient")
#remove the two sites with zeros so that they do not make the plot look funny on the log axis
aa_screen2 <- aa_screen[-c(1,2),]
aa.plt.log <- ggplot(aa_screen2, aes(x = PC1_500m, y = total)) +
geom_point(aes(color=Field_Type), size = 4) +
scale_color_manual(values = mycol) +
geom_line(data = ndata, size =2, aes(color=Field_Type)) +
geom_ribbon(data = ndata,
aes(ymin = right_lwr, ymax = right_upr, color=Field_Type),
alpha = 0.1)+
theme_bw() +
theme(legend.position = "none") +
theme(panel.grid.major=element_blank(),
panel.grid.minor=element_blank()) +
scale_y_log10(labels = scales::number_format(accuracy = 1)) +
ylab("A. americanum abundance") +
xlab("Development Gradient")
grid.arrange(aa.plt, aa.plt.log, ncol = 2)
join the covariate data to the Ixodes scapularis data
is_var <- is_allvisits %>%
left_join(covar)
head(is_var)
## # A tibble: 6 × 26
## # Groups: Site_ID [6]
## Site_ID Lifestage total density Site Easting Northing Field_Type Forest250m
## <chr> <chr> <int> <dbl> <chr> <dbl> <dbl> <chr> <dbl>
## 1 DF A 3 0.025 "Dudle… 350804. 3281711. Manicured 0.442
## 2 HC A 35 0.292 "Hatch… 381436. 3285055. Natural 0.833
## 3 HW A 1 0.00833 "Harmo… 368536. 3280276. Natural 0.290
## 4 JM A 1 0.00833 "John … 365650. 3281144. Natural 0.286
## 5 MSM A 5 0.0417 "Morni… 376482. 3281179. Manicured 0.767
## 6 MSN A 24 0.2 "Morni… 376592. 3281485. Natural 0.737
## # … with 17 more variables: Forest500m <dbl>, Forest1km <dbl>, Forest2km <dbl>,
## # Forest3km <dbl>, Forest4km <dbl>, PC1_250m <dbl>, PC2_250m <dbl>,
## # PC1_500m <dbl>, PC2_500m <dbl>, PC1_1km <dbl>, PC2_1km <dbl>,
## # PC1_2km <dbl>, PC2_2km <dbl>, PC1_3km <dbl>, PC2_3km <dbl>, PC1_4km <dbl>,
## # PC2_4km <dbl>
is_combined <- is_allvisits %>%
group_by(Site_ID) %>%
dplyr::summarise(total = sum(total),
density = sum(density)) %>%
left_join(covar)
head(is_combined)
## # A tibble: 6 × 25
## Site_ID total density Site Easting Northing Field_Type Forest250m Forest500m
## <chr> <int> <dbl> <chr> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 GA 0 0 "Gree… 366139. 3280294. Manicured 0.635 0.566
## 2 UG 2 0.0167 "Univ… 368562. 3280166. Manicured 0.406 0.228
## 3 HW 1 0.00833 "Harm… 368536. 3280276. Natural 0.290 0.192
## 4 RP 0 0 "Rese… 373075. 3281598. Manicured 0 0.0260
## 5 JM 1 0.00833 "John… 365650. 3281144. Natural 0.286 0.134
## 6 PC 0 0 "Poss… 365990. 3286586. Manicured 0.484 0.325
## # … with 16 more variables: Forest1km <dbl>, Forest2km <dbl>, Forest3km <dbl>,
## # Forest4km <dbl>, PC1_250m <dbl>, PC2_250m <dbl>, PC1_500m <dbl>,
## # PC2_500m <dbl>, PC1_1km <dbl>, PC2_1km <dbl>, PC1_2km <dbl>, PC2_2km <dbl>,
## # PC1_3km <dbl>, PC2_3km <dbl>, PC1_4km <dbl>, PC2_4km <dbl>
is_combined$Field_Type <- as.factor(is_combined$Field_Type)
test for “scale of effect” using a buffer based approach for the final go i should probably derive more scales but for now i will work with what there is
pres.250m<-glm(total ~ PC1_250m, family = "poisson", data = is_combined)
pres.500m<-glm(total ~ PC1_500m, family = "poisson", data = is_combined)
pres.1km<-glm(total ~ PC1_1km, family = "poisson", data = is_combined)
pres.2km<-glm(total ~ PC1_2km, family = "poisson", data = is_combined)
pres.3km<-glm(total ~ PC1_3km, family = "poisson", data = is_combined)
pres.4km<-glm(total ~ PC1_4km, family = "poisson", data = is_combined)
comb.mod<-model.sel(pres.250m, pres.500m, pres.1km, pres.2km, pres.3km, pres.4km)
comb.mod
## Model selection table
## (Int) PC1_250 PC1_500 PC1_1km PC1_2km PC1_3km PC1_4km family
## pres.500m 0.4867 0.874 poisson(log)
## pres.1km 0.7729 0.6031 poisson(log)
## pres.2km 0.9631 0.4771 poisson(log)
## pres.250m 1.0190 -0.7698 poisson(log)
## pres.4km 1.2460 0.3047 poisson(log)
## pres.3km 1.2240 0.331 poisson(log)
## df logLik AICc delta weight
## pres.500m 2 -80.669 166.1 0.00 0.997
## pres.1km 2 -86.667 178.1 12.00 0.002
## pres.2km 2 -89.088 183.0 16.84 0.000
## pres.250m 2 -91.228 187.3 21.12 0.000
## pres.4km 2 -102.680 210.2 44.02 0.000
## pres.3km 2 -103.464 211.7 45.59 0.000
## Models ranked by AICc(x)
scales<-c(250,500,1000,2000,3000,4000)
ll<-c(logLik(pres.250m),logLik(pres.500m),logLik(pres.1km),logLik(pres.2km),logLik(pres.3km),logLik(pres.4km))
plot(scales,ll, ylab="Log-likelihood", main = "PC1")
lines(scales,ll)
these results indicate that 500m and 1km for PC1 is the best scale for I.scapularis adults
nb.glm0 <- glm.nb(total ~ 1, data = is_combined)
nb.glm1 <- glm.nb(total ~ Field_Type, data = is_combined)
nb.glm8 <- glm.nb(total ~ PC1_500m, data = is_combined)
nb.glm9 <- glm.nb(total ~ PC1_500m + Field_Type, data = is_combined)
nb.glm10 <- glm.nb(total ~ PC1_500m * Field_Type, data = is_combined)
### compare AICc of models
MuMIn::AICc(nb.glm0, nb.glm1, nb.glm8, nb.glm9, nb.glm10)
## df AICc
## nb.glm0 2 91.54595
## nb.glm1 3 88.92499
## nb.glm8 3 86.52100
## nb.glm9 4 86.69970
## nb.glm10 5 89.86801
stargazer(nb.glm0, nb.glm1, nb.glm8, nb.glm9, nb.glm10, type = "text",
intercept.bottom = FALSE,
single.row=FALSE,
notes.append = FALSE,
header=FALSE)
##
## ==========================================================================================================
## Dependent variable:
## -------------------------------------------------------------------------------
## total
## (1) (2) (3) (4) (5)
## ----------------------------------------------------------------------------------------------------------
## Constant 1.598*** 0.105 0.645 -0.169 -0.052
## (0.438) (0.580) (0.410) (0.531) (0.520)
##
## Field_TypeNatural 2.067*** 1.371** 1.088
## (0.765) (0.666) (0.739)
##
## PC1_500m:Field_TypeNatural 0.404
## (0.458)
##
## PC1_500m 0.752*** 0.620*** 0.354
## (0.242) (0.216) (0.371)
##
## ----------------------------------------------------------------------------------------------------------
## Observations 18 18 18 18 18
## Log Likelihood -44.373 -41.605 -40.403 -38.811 -38.434
## theta 0.308** (0.121) 0.470** (0.205) 0.576** (0.264) 0.802** (0.407) 0.832** (0.422)
## Akaike Inf. Crit. 90.746 87.211 84.807 83.623 84.868
## ==========================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
top model (using AIC) includes both PC1 and field type variable but no interaction term
par(mfrow=c(2,2), mar=c(4,4,2,1))
plot(nb.glm9) #PC1 @ 500m
site 2 could be probematic for PC1
start by defining weights matrix using several graph types (this is a repeat of the same code used for A.americanum)
coords <- cbind(aa_combined$Easting, aa_combined$Northing)
colnames(coords) <- c("x", "y")
distmat <- as.matrix(dist(coords))
maxdist <- 2/3 * max(distmat) # this is the maximum distance to consider in correlogram/variogram
nb.gab <- spdep::graph2nb(spdep::gabrielneigh(coords), sym = TRUE)
#plot(nb.gab, coords)
listw.gab <- spdep::nb2listw(nb.gab)
nb.rel <- spdep::graph2nb(spdep::relativeneigh(coords), sym = TRUE)
#plot(nb.rel, coords)
listw.rel <- spdep::nb2listw(nb.rel)
#distance-based neighbors - can change the distance at which to consider
neigh <- dnearneigh(x=coords, d1=0, d2=0.5*maxdist, longlat = F)
#plot(neigh, coords)
listw.neigh <- nb2listw(neigh, style = "W")
test for spatial autocorrelation in explainatory and predictor variables - gabrial
spdep::moran.test(is_combined$total, listw.gab)
##
## Moran I test under randomisation
##
## data: is_combined$total
## weights: listw.gab
##
## Moran I statistic standard deviate = 2.875, p-value = 0.00202
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.36996484 -0.05882353 0.02224464
spdep::moran.test(is_combined$PC1_500m, listw.gab)
##
## Moran I test under randomisation
##
## data: is_combined$PC1_500m
## weights: listw.gab
##
## Moran I statistic standard deviate = 3.2639, p-value = 0.0005494
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.55781767 -0.05882353 0.03569368
yes there is autocorrelation in the response and predictor variables (using the gabriel weights)
test for spatial autocorrelation in explainatory and predictor variables - relative
spdep::moran.test(is_combined$total, listw.rel)
##
## Moran I test under randomisation
##
## data: is_combined$total
## weights: listw.rel
##
## Moran I statistic standard deviate = 3.8904, p-value = 5.004e-05
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.65430075 -0.05882353 0.03360027
spdep::moran.test(is_combined$PC1_500m, listw.rel)
##
## Moran I test under randomisation
##
## data: is_combined$PC1_500m
## weights: listw.rel
##
## Moran I statistic standard deviate = 2.0962, p-value = 0.01803
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.42816186 -0.05882353 0.05397205
yes there is autocorrelation in the response and predictor variables (using the relative weights)
test for spatial autocorrelation in explainatory and predictor variables - neighbor
spdep::moran.test(is_combined$total, listw.neigh)
##
## Moran I test under randomisation
##
## data: is_combined$total
## weights: listw.neigh
##
## Moran I statistic standard deviate = 0.39793, p-value = 0.3453
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## -0.021024161 -0.058823529 0.009022961
spdep::moran.test(is_combined$PC1_500m, listw.neigh)
##
## Moran I test under randomisation
##
## data: is_combined$PC1_500m
## weights: listw.neigh
##
## Moran I statistic standard deviate = 1.8916, p-value = 0.02928
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.16771013 -0.05882353 0.01434256
there is autocorrelation in the predictor variables, but NOT response (using the neighborhood weights)
now test model for autocorrelation in the residuals - PC1 at 500m
spdep::lm.morantest(nb.glm9, listw.gab)
##
## Global Moran I for regression residuals
##
## data:
## model: glm.nb(formula = total ~ PC1_500m + Field_Type, data =
## is_combined, init.theta = 0.80187564, link = log)
## weights: listw.gab
##
## Moran I statistic standard deviate = 1.6445, p-value = 0.05004
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.14399039 -0.18175250 0.03923694
spdep::lm.morantest(nb.glm9, listw.rel)
##
## Global Moran I for regression residuals
##
## data:
## model: glm.nb(formula = total ~ PC1_500m + Field_Type, data =
## is_combined, init.theta = 0.80187564, link = log)
## weights: listw.rel
##
## Moran I statistic standard deviate = 1.9896, p-value = 0.02332
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.23380474 -0.19068724 0.04552036
spdep::lm.morantest(nb.glm9, listw.neigh)
##
## Global Moran I for regression residuals
##
## data:
## model: glm.nb(formula = total ~ PC1_500m + Field_Type, data =
## is_combined, init.theta = 0.80187564, link = log)
## weights: listw.neigh
##
## Moran I statistic standard deviate = 2.0888, p-value = 0.01836
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.13353396 -0.14203771 0.01740528
there is spatial autocorrelation in model residuals using the relative and neighbor weights
check correlograms for where the spatial autocorrelation may be occuring in the model
add a function to plot correlograms
icorrelogram <- function(locations,z, binsize, maxdist){
distbin <- seq(0,maxdist,by=binsize)
Nbin <- length(distbin)-1
moran.results <- data.frame("dist"= rep(NA,Nbin), "Morans.i"=NA,"null.lcl"=NA, "null.ucl"=NA)
for (i in 1:Nbin){
d.start<-distbin[i]
d.end<-distbin[i+1]
neigh <- dnearneigh(x=locations, d1=d.start, d.end, longlat=F)
wts <- nb2listw(neighbours=neigh, style='B', zero.policy=T)
mor.i <- moran.mc(x=z, listw=wts, nsim=200, alternative="greater", zero.policy=T) #note alternative is for P-value, so only 'significant if positive autocorrelation
moran.results[i, "dist"]<-(d.end+d.start)/2
moran.results[i, "Morans.i"]<-mor.i$statistic #observed moran's i
moran.results[i, "null.lcl"]<-quantile(mor.i$res, probs = 0.025,na.rm = T)#95% null envelope
moran.results[i, "null.ucl"]<-quantile(mor.i$res, probs = 0.975,na.rm = T)#95% null envelope
}
return(moran.results)
}
plot a correlogram for the models using the indicator correlogram function
# save residuals for negative binomial models
nb.glm9.res <- residuals(nb.glm9, type = "deviance")
#models
cor_plot <-icorrelogram(locations=coords, z=nb.glm9.res, binsize=1000,maxdist= 20000)
round(head(cor_plot ,3),2)
## dist Morans.i null.lcl null.ucl
## 1 500 0.17 -0.33 0.38
## 2 1500 -0.12 -0.36 0.30
## 3 2500 -0.19 -0.33 0.26
#plot correlogram
plot(cor_plot$dist, cor_plot$Morans.i, ylim = c(-0.5, 0.5))
abline(h=0, lty = "dashed")
lines(cor_plot$dist, cor_plot$null.lcl)
lines(cor_plot$dist, cor_plot$null.ucl)
there is only spatial autocorrelation at one point and it is at a fairly large lag distance
from what i can see from Fletcher’s book, methods to account for spatial autocorrelation mostly address the issue at small lag distances, not intermediate to large. so i am going to move forward with the model as is.
summary(nb.glm9)
##
## Call:
## glm.nb(formula = total ~ PC1_500m + Field_Type, data = is_combined,
## init.theta = 0.80187564, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6320 -1.1464 -0.7445 0.6170 1.5476
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.1689 0.5306 -0.318 0.75019
## PC1_500m 0.6205 0.2160 2.873 0.00406 **
## Field_TypeNatural 1.3714 0.6655 2.061 0.03934 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.8019) family taken to be 1)
##
## Null deviance: 36.215 on 17 degrees of freedom
## Residual deviance: 18.487 on 15 degrees of freedom
## AIC: 83.623
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.802
## Std. Err.: 0.407
##
## 2 x log-likelihood: -75.623
pseudo r-squared of model
100*(36.215-18.487)/36.215
## [1] 48.95209
plot the effects of PC1 and Enviro type
df1 <- expand.grid(PC1_500m = c(-3,-2.5,-2,-1.5,-1,-0.5,0,0.5,1,1.5,2,2.5,3), Field_Type = c("Natural","Manicured"), total = NA)
df1$total <- predict(nb.glm9, newdata = data.frame(df1), type="response")
#plot
plot <-ggplot(data = is_combined, aes(x=PC1_500m, y=total, group=Field_Type))+
geom_point(aes(color=Field_Type, size = 2)) +
geom_line(data=df1, size = 2, aes(color=Field_Type)) +
ylab("I. scapularis abundance") +
xlab("PC1 at 500m")+
#ggtitle("PC1 and Environment Type as Predictors of tick count")+
theme_bw()+
theme(panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
legend.position = c(0.3,0.8)) +
scale_color_discrete(name = "Environment Type", labels = c("manicured", "natural"))+
scale_size(guide = 'none')
plot.log <- ggplot(data = is_combined, aes(x=PC1_500m, y=total, group=Field_Type))+
geom_point(aes(color=Field_Type, size =2)) +
geom_line(data=df1, size = 2, aes(color=Field_Type)) +
ylab("I. scapularis abundance") +
xlab("PC1 at 500m")+
scale_y_log10() + ## may or may not want to use a log axis....
#ggtitle("PC1 and Environment Type as Predictors of tick count")+
theme_bw()+
theme(panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
legend.position = c(0.3,0.8)) +
scale_color_discrete(name = "Environment Type", labels = c("manicured", "natural")) +
scale_size(guide = 'none')
grid.arrange(plot, plot.log, ncol = 2)
## Warning: Transformation introduced infinite values in continuous y-axis
how does removing the sites that were violating models influence things? for now just remove site 2 for PC1 analyses
is_screen <- is_combined[-c(2), ]
plot1 <- ggplot(data = is_combined, aes(x = PC1_500m, y = total)) +
geom_point(aes(col = Field_Type)) +
xlim(-3,3) +
ggtitle("Before")
plot2 <- ggplot(data = is_screen, aes(x = PC1_500m, y = total)) +
geom_point(aes(col = Field_Type)) +
xlim(-3,3) +
ggtitle("After")
gridExtra::grid.arrange(plot1, plot2, ncol=2)
rerun models using the screened dataset
#### compare model results for full and screened glm
nb.glm0.reduced <- glm.nb(total ~ 1, data = is_screen)
nb.glm1.reduced <- glm.nb(total ~ Field_Type, data = is_screen)
nb.glm8.reduced <- glm.nb(total ~ PC1_500m , data = is_screen)
nb.glm9.reduced <- glm.nb(total ~ PC1_500m + Field_Type, data = is_screen)
nb.glm10.reduced <- glm.nb(total ~ PC1_500m * Field_Type, data = is_screen)
AICc(nb.glm0.reduced, nb.glm1.reduced ,nb.glm8.reduced ,nb.glm9.reduced ,nb.glm10.reduced )
## df AICc
## nb.glm0.reduced 2 86.35672
## nb.glm1.reduced 3 84.30552
## nb.glm8.reduced 3 80.97665
## nb.glm9.reduced 4 79.38121
## nb.glm10.reduced 5 83.04529
stargazer(nb.glm8.reduced, nb.glm9.reduced, type = "text",
intercept.bottom = FALSE,
single.row=FALSE,
notes.append = FALSE,
header=FALSE)
##
## =================================================
## Dependent variable:
## -------------------------------
## total
## (1) (2)
## -------------------------------------------------
## Constant 0.361 -0.765
## (0.483) (0.628)
##
## PC1_500m 0.908*** 0.825***
## (0.284) (0.248)
##
## Field_TypeNatural 1.721**
## (0.716)
##
## -------------------------------------------------
## Observations 17 17
## Log Likelihood -37.565 -35.024
## theta 0.570** (0.272) 0.846** (0.429)
## Akaike Inf. Crit. 79.131 76.048
## =================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
par(mfrow=c(2,2), mar=c(4,4,2,1))
plot(nb.glm9.reduced)
the reduced model for PC1_500m is looking pretty good
the PC1 at 500m is an improvement to the original model so i will move forwared using the screened dataset
#must redo weigh matrices to not include site 2
coords.screen <- coords[-2, ]
nb.gab.screen <- spdep::graph2nb(spdep::gabrielneigh(coords.screen), sym = TRUE)
#plot(nb.gab.screen, coords.screen)
listw.gab.screen <- spdep::nb2listw(nb.gab.screen)
nb.rel.screen <- spdep::graph2nb(spdep::relativeneigh(coords.screen), sym = TRUE)
#plot(nb.rel.screen, coords.screen)
listw.rel.screen <- spdep::nb2listw(nb.rel.screen)
#distance-based neighbors
neigh.screen <- dnearneigh(x=coords.screen, d1=0, d2=0.5*maxdist, longlat = F)
#plot(neigh.screen, coords.screen)
listw.neigh.screen <- nb2listw(neigh.screen, style = "W")
spdep::lm.morantest(nb.glm9.reduced, listw.gab.screen)
##
## Global Moran I for regression residuals
##
## data:
## model: glm.nb(formula = total ~ PC1_500m + Field_Type, data =
## is_screen, init.theta = 0.8463850464, link = log)
## weights: listw.gab.screen
##
## Moran I statistic standard deviate = 1.3158, p-value = 0.09411
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.06366157 -0.24622154 0.05546174
spdep::lm.morantest(nb.glm9.reduced, listw.rel.screen)
##
## Global Moran I for regression residuals
##
## data:
## model: glm.nb(formula = total ~ PC1_500m + Field_Type, data =
## is_screen, init.theta = 0.8463850464, link = log)
## weights: listw.rel.screen
##
## Moran I statistic standard deviate = 1.514, p-value = 0.06502
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.09787073 -0.25365580 0.05391202
spdep::lm.morantest(nb.glm9.reduced, listw.neigh.screen)
##
## Global Moran I for regression residuals
##
## data:
## model: glm.nb(formula = total ~ PC1_500m + Field_Type, data =
## is_screen, init.theta = 0.8463850464, link = log)
## weights: listw.neigh.screen
##
## Moran I statistic standard deviate = 1.9585, p-value = 0.02509
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.14264019 -0.17240174 0.02587631
now only the neighboorhood matrix has significant spatial autocorrelation
# save residuals for negative binomial models
nb.glm9.reduced.res <- residuals(nb.glm9.reduced, type = "deviance")
cor_plot <-icorrelogram(locations=coords.screen, z=nb.glm9.reduced.res, binsize=1000,maxdist= 20000)
round(head(cor_plot ,3),2)
## dist Morans.i null.lcl null.ucl
## 1 500 0.07 -0.28 0.27
## 2 1500 -0.14 -0.31 0.35
## 3 2500 -0.06 -0.37 0.27
#plot correlogram
plot(cor_plot$dist, cor_plot$Morans.i, ylim = c(-0.5, 0.5))
abline(h=0, lty = "dashed")
lines(cor_plot$dist, cor_plot$null.lcl)
lines(cor_plot$dist, cor_plot$null.ucl)
only one point barely falls outside the 95% CI so i believe we are good to go
summary(nb.glm9.reduced)
##
## Call:
## glm.nb(formula = total ~ PC1_500m + Field_Type, data = is_screen,
## init.theta = 0.8463850464, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5995 -1.0769 -0.6338 0.4178 1.0562
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.7645 0.6276 -1.218 0.223128
## PC1_500m 0.8254 0.2483 3.324 0.000888 ***
## Field_TypeNatural 1.7212 0.7156 2.405 0.016159 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.8464) family taken to be 1)
##
## Null deviance: 37.133 on 16 degrees of freedom
## Residual deviance: 15.421 on 14 degrees of freedom
## AIC: 76.048
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.846
## Std. Err.: 0.429
##
## 2 x log-likelihood: -68.048
pseudo r-squared of model
100*(37.133-15.421)/37.133
## [1] 58.4709
the reduced model has a much better R2 than the original
get model predictions and 95%CI - this is using the screened dataset
## grad the inverse link function
ilink <- family(nb.glm9.reduced)$linkinv
## add predition, fit, and se.fit on the **link** scale
ndata <- expand.grid(PC1_500m = c(-3,-2.5,-2,-1.5,-1,-0.5,0,0.5,1,1.5,2,2.5,3), Field_Type = c("Manicured", "Natural"))
ndata$total <- predict(nb.glm9.reduced, newdata = data.frame(ndata), type = "response")
ndata <- bind_cols(ndata, setNames(as_tibble(predict(nb.glm9.reduced, ndata, se.fit = TRUE)[1:2]),
c('fit_link','se_link')))
## create the interval and backtransform
ndata <- mutate(ndata,
fit_resp = ilink(fit_link),
right_upr = ilink(fit_link + (2 * se_link)),
right_lwr = ilink(fit_link - (2 * se_link)))
## show
ndata
## PC1_500m Field_Type total fit_link se_link fit_resp
## 1 -3.0 Manicured 0.03913889 -3.24063868 1.1445431 0.03913889
## 2 -2.5 Manicured 0.05913360 -2.82795602 1.0393343 0.05913360
## 3 -2.0 Manicured 0.08934291 -2.41527335 0.9387665 0.08934291
## 4 -1.5 Manicured 0.13498513 -2.00259068 0.8444994 0.13498513
## 5 -1.0 Manicured 0.20394437 -1.58990801 0.7588844 0.20394437
## 6 -0.5 Manicured 0.30813251 -1.17722535 0.6851728 0.30813251
## 7 0.0 Manicured 0.46554679 -0.76454268 0.6275730 0.46554679
## 8 0.5 Manicured 0.70337858 -0.35186001 0.5908162 0.70337858
## 9 1.0 Manicured 1.06271043 0.06082266 0.5788864 1.06271043
## 10 1.5 Manicured 1.60561253 0.47350533 0.5932833 1.60561253
## 11 2.0 Manicured 2.42586459 0.88618799 0.6322108 2.42586459
## 12 2.5 Manicured 3.66515513 1.29887066 0.6915387 3.66515513
## 13 3.0 Manicured 5.53755644 1.71155333 0.7665447 5.53755644
## 14 -3.0 Natural 0.21883326 -1.51944520 1.1391423 0.21883326
## 15 -2.5 Natural 0.33062763 -1.10676254 1.0244772 0.33062763
## 16 -2.0 Natural 0.49953387 -0.69407987 0.9123002 0.49953387
## 17 -1.5 Natural 0.75472850 -0.28139720 0.8036539 0.75472850
## 18 -1.0 Natural 1.14029325 0.13128547 0.7001838 1.14029325
## 19 -0.5 Natural 1.72282974 0.54396813 0.6045535 1.72282974
## 20 0.0 Natural 2.60296402 0.95665080 0.5210972 2.60296402
## 21 0.5 Natural 3.93272854 1.36933347 0.4565409 3.93272854
## 22 1.0 Natural 5.94182388 1.78201614 0.4196984 5.94182388
## 23 1.5 Natural 8.97729673 2.19469880 0.4179637 8.97729673
## 24 2.0 Natural 13.56348795 2.60738147 0.4517416 13.56348795
## 25 2.5 Natural 20.49260604 3.02006414 0.5140789 20.49260604
## 26 3.0 Natural 30.96157154 3.43274681 0.5960818 30.96157154
## right_upr right_lwr
## 1 0.3861411 0.003967080
## 2 0.4727033 0.007397415
## 3 0.5840666 0.013666518
## 4 0.7308172 0.024932343
## 5 0.9304014 0.044704691
## 6 1.2130287 0.078271560
## 7 1.6333013 0.132696776
## 8 2.2927967 0.215780767
## 9 3.3824338 0.333887827
## 10 5.2596891 0.490141442
## 11 8.5900932 0.685070449
## 12 14.6135331 0.919241227
## 13 25.6525457 1.195379660
## 14 2.1357962 0.022421613
## 15 2.5655987 0.042607843
## 16 3.0972682 0.080565865
## 17 3.7656125 0.151267583
## 18 4.6258169 0.281089527
## 19 5.7723257 0.514202148
## 20 7.3805288 0.918013044
## 21 9.8003226 1.578147417
## 22 13.7551441 2.566695832
## 23 20.7101981 3.891409264
## 24 33.4772009 5.495328181
## 25 57.2954879 7.329493436
## 26 101.9936405 9.398810624
plot it
mycol <- viridisLite::viridis(3)
is.plt <- ggplot(is_screen, aes(x = PC1_500m, y = total)) +
geom_point(aes(color=Field_Type), size = 4) +
scale_color_manual(values = mycol, name = "Habitat Type", labels = c("manicured", "natural")) +
geom_line(data = ndata, size =2, aes(color=Field_Type)) +
geom_ribbon(data = ndata,
aes(ymin = right_lwr, ymax = right_upr, color=Field_Type),
alpha = 0.1)+
theme_bw() +
theme(legend.position = "none") +
theme(panel.grid.major=element_blank(),
panel.grid.minor=element_blank()) +
scale_size(guide = 'none') +
ylab("I. scapularis abundance") +
xlab("Development Gradient")
is_screen2 <- is_screen[-c(1,3,5,6,7,8,11),]
is.plt.log <- ggplot(is_screen2, aes(x = PC1_500m, y = total)) +
geom_point(aes(color=Field_Type), size = 4) +
scale_color_manual(values = mycol, name = "Habitat Type", labels = c("manicured", "natural")) +
geom_line(data = ndata, size =2, aes(color=Field_Type)) +
geom_ribbon(data = ndata,
aes(ymin = right_lwr, ymax = right_upr, color=Field_Type),
alpha = 0.1)+
theme_bw() +
theme(legend.position = "none") +
theme(panel.grid.major=element_blank(),
panel.grid.minor=element_blank()) +
scale_size(guide = 'none') +
scale_y_log10(labels = scales::number_format(accuracy = 1)) +
ylab("I. scapularis abundance") +
xlab("Development Gradient")
grid.arrange(is.plt, is.plt.log, ncol = 2)
put all the plots together…
grid.arrange(aa.plt, aa.plt.log, is.plt, is.plt.log, ncol = 2)
library(cowplot)
final_plot <- ggdraw()+
draw_plot(aa.plt, x=0, y=0.5, width = 0.5, height =0.5)+
draw_plot(aa.plt.log, x=0.5, y=0.5, width = 0.5, height =0.5)+
draw_plot(is.plt,x=0, y=0, width = 0.5, height =0.5 )+
draw_plot(is.plt.log, x=0.5, y=0, width = 0.5, height =0.5)+
draw_plot_label(c("A","B","C","D"), c(0, 0.5, 0, 0.5), c(1,1,0.5,0.5), size=10)
final_plot
#ggsave("Figures/TickAbundanceModels.tiff", dpi = 300)